

************************************************
* LOAD AND PREP COP AND HDD FROM R CODE HANDOFF
************************************************
clear
import delimited Data_Proc\State_COP_and_HDD.csv	/* State level heat pump COP data generated by R code */
ren st state_fips
ren awcop65top cop_top
ren awcop65proto cop_proto
keep state_fips cop_top
sort state_fips
save Data_Proc\cops, replace


*********************************************
* LOAD AND PREP CPI DATA FROM R CODE HANDOFF
*********************************************
clear
import delimited Data_Proc\CPI.csv					/* Monthly CPI data generated by R code */
gen date = date(ym,"MY")
gen year = year(date)
keep if year<=2020
collapse (mean) cpi2019, by(year)
replace year = year-2000
sort year
save Data_Proc\cpi, replace


*********************************
* LOAD AND PREP ENERGY PRICES
*********************************

* STATE ABBREVIATIONS
clear
import excel "Data_Archive\EIA Prices\StatesAndPadds.xlsx", sheet("Sheet1") firstrow
sort state_postal
save Data_Proc\StatesAndPadds, replace

* AVERAGE ELECTRICITY PRICES
clear
import excel "Data_Archive\EIA Prices\sales_revenue.xlsx", sheet("Monthly-States") cellrange(A3:H8112) firstrow
* ORIGINAL DATA SOURCE: https://www.eia.gov/electricity/data/eia861m/
ren Year year
ren Month month
ren State state
ren Thou rev
ren Mega mwh
keep if year<=2020
collapse (sum) rev mwh, by(state year)
gen pe = rev/mwh
replace pe = pe/3412*1000000
keep state year pe
replace year = year-2000
sort state year
merge m:1 year using Data_Proc\cpi
tab _merge
drop _merge
replace pe = pe/cpi2019
drop cpi2019
reshape wide pe, i(state) j(year)
forvalues t=10/20 {
	gen ph`t' = pe`t'
}
ren state state_postal
sort state_postal
save Data_Proc\electricity, replace

* NATURAL GAS PRICES
clear
import excel "Data_Archive\EIA Prices\NG_PRI_SUM_A_EPG0_PRS_DMCF_A.xlsx", sheet("Data 1") cellrange(A3:BA59) firstrow
* ORIGINAL DATA SOURCE: https://www.eia.gov/dnav/ng/ng_pri_sum_a_epg0_prs_dmcf_a.htm
local states = "Alabama Alaska Arizona Arkansas California Colorado Connecticut Delaware DistrictofColumbia Florida Georgia Hawaii Idaho Illinois Indiana Iowa Kansas Kentucky Louisiana Maine Maryland Massachusetts Michigan Minnesota Mississippi Missouri Montana Nebraska Nevada NewHampshire NewJersey NewMexico NewYork NorthCarolina NorthDakota Ohio Oklahoma Oregon Pennsylvania RhodeIsland SouthCarolina SouthDakota Tennessee Texas Utah Vermont Virginia Washington WestVirginia Wisconsin Wyoming"
foreach s of local states {
	di "`s'"
	ren `s' pg`s'
}
drop USPrice
gen year = year(Date)
collapse (mean) pg*, by(year)
sort year
keep if year<=2020 & year>=2010
replace year = year - 2000
reshape long pg, i(year) j(state) string
replace pg = pg/1000/1039*1000000
sort state year
merge m:1 year using Data_Proc\cpi
tab _merge
drop _merge
replace pg = pg/cpi2019
drop cpi2019
reshape wide pg, i(state) j(year)
ren state squoosh
sort squoosh
save Data_Proc\naturalgas, replace

* HEATING OIL PRICES
clear
import excel "Data_Archive\EIA Prices\PET_PRI_WFR_A_EPD2F_PRS_DPGAL_W.xlsx", sheet("Data 1") cellrange(A3:AD713) firstrow
* ORIGINAL DATA SOURCE: http://www.eia.gov/dnav/pet/pet_pri_wfr_a_epd2f_prs_dpgal_w.htm
foreach var of varlist Weekly* {
	local newname = subinstr("`var'","Weekly","",.)
	rename `var' `newname'
}
ren NewEngland PADD1A
ren Central PADD1B
ren Lower PADD1C
drop EastCoast USNo2
local states = "Connecticut Delaware DistrictofColumbia Illinois Indiana Iowa Kentucky Maine Maryland Massachusetts Michigan Minnesota Nebraska NewHampshire NewJersey NewYork NorthCarolina Ohio Pennsylvania RhodeIsland Vermont Virginia Wisconsin PADD1A PADD1B PADD1C PADD2"
foreach var of local states {
	rename `var' po`var'
}
gen year = year(Date)
collapse (mean) po*, by(year)
sort year
keep if year<=2020 & year>=2010
replace year = year - 2000
reshape long po, i(year) j(state) string
replace po = po/138500*1000000
sort state year
merge m:1 year using Data_Proc\cpi
tab _merge
drop _merge
replace po = po/cpi2019
drop cpi2019
reshape wide po, i(state) j(year)
ren state squoosh
sort squoosh
save Data_Proc\heatingoil, replace

* PROPANE PRICES
clear
import excel "Data_Archive\EIA Prices\PET_PRI_WFR_A_EPLLPA_PRS_DPGAL_W.xlsx", sheet("Data 1") cellrange(A3:AU717) firstrow
* ORIGINAL DATA SOURCE: http://www.eia.gov/dnav/pet/pet_pri_wfr_a_epllpa_prs_dpgal_w.htm
foreach var of varlist Weekly* {
	local newname = subinstr("`var'","Weekly","",.)
	rename `var' `newname'
}
ren NewEng PADD1A
ren Central PADD1B
ren Lower PADD1C
ren Midwest PADD2
ren Gulf PADD3
ren Rocky PADD4
drop EastCoast USPropane
local states = "Alabama Arkansas Colorado Connecticut Delaware Florida Georgia Idaho Illinois Indiana Iowa Kansas Kentucky Maine Maryland Massachusetts Michigan Minnesota Mississippi Missouri Montana Nebraska NewHampshire NewJersey NewYork NorthCarolina NorthDakota Ohio Oklahoma Pennsylvania RhodeIsland SouthDakota Tennessee Texas Utah Vermont Virginia Wisconsin PADD1A PADD1B PADD1C PADD2 PADD3 PADD4"
foreach var of local states {
	rename `var' pp`var'
}
gen year = year(Date)
collapse (mean) pp*, by(year)
sort year
keep if year<=2020 & year>=2010
replace year = year - 2000
reshape long pp, i(year) j(state) string
replace pp = pp/91452*1000000
sort state year
merge m:1 year using Data_Proc\cpi
tab _merge
drop _merge
replace pp = pp/cpi2019
drop cpi2019
reshape wide pp, i(state) j(year)
ren state squoosh
sort squoosh
save Data_Proc\propane, replace


*********************************
* LOAD AND PREP RECS DATASETS
*********************************
clear

* LOAD AND PREP RACE DATA FOR 2020
import delimited "Data_Archive\RECS\recs2020_public_v4.csv"
* ORIGINAL DATA SOURCE: https://www.eia.gov/consumption/residential/data/2020/
ren householder_race race
gen twenty = 20
egen temp = concat(doeid twenty), punc(-)
drop doeid
gen doeid = temp
keep doeid race
destring race, force replace
sort doeid
save Data_Proc\recs2020_race, replace
clear

* LOAD AND PREP RACE DATA FOR 2009
import delimited "Data_Archive\RECS\recs2009_public.csv"
* ORIGINAL DATA SOURCE: https://www.eia.gov/consumption/residential/data/2009/
ren householder_race race
gen nine = "09"
egen temp = concat(doeid nine), punc(-)
drop doeid
gen doeid = temp
keep doeid race
destring race, force replace
sort doeid
save Data_Proc\recs2009_race, replace
append using Data_Proc\recs2020_race
sort doeid
save Data_Proc\recs20092020_race, replace
clear



*********************************
* PREPARE TO APPEND 2009-2020
*********************************

* LOAD DATA FROM R HANDOFF (RECS 2009, 2015, AND 2020)
import delimited "Data_Proc\RECS.csv"

* BASIC CLEANING: HOME AGE, EQUIPMENT AGE
replace yearmaderange = "2000 to 2009" if yearmaderange=="2000 to 2004" | yearmaderange=="2005 to 2009"
replace yearmaderange = "2010 to 2020" if yearmaderange=="2010 to 2015" | yearmaderange=="2016 to 2020"
replace equipage = "20 years or older" if equipage=="20 years old"
replace equipage = "10 to 19 years old" if equipage=="10 to 14 years old" | equipage=="15 to 19 years old"
replace equipage = "NA" if equipage=="Not applicable"
save Data_Proc\recs200920152020, replace
clear




***********************************
* SPLIT 2009, 2015, AND 2020
***********************************
foreach x of numlist 2009 2015 2020 {
	use Data_Proc\recs200920152020
	keep if recs==`x'
	save Data_Proc\recs`x', replace
	clear
}




***********************************
* MERGE ENERGY PRICES TO RECS 2020
***********************************
clear

* OPEN RECS 2020
use Data_Proc\recs2020

* MERGE IN STATE ABBREVIATIONS AND FIPS
sort state_postal
merge m:1 state_postal using Data_Proc\StatesAndPadds
drop _merge

* CREATE REPORTABLE GEOGRAPHY TO MATCH RECS 2009
replace reportable_domain = "Connecticut, Maine, New Hampshire, Rhode Island, Vermont" if state_postal=="CT" | state_postal=="ME" | state_postal=="NH" | state_postal=="RI" | state_postal=="VT"
replace reportable_domain = "Massachusetts" if state_postal=="MA"
replace reportable_domain = "New York" if state_postal=="NY"
replace reportable_domain = "New Jersey" if state_postal=="NJ"
replace reportable_domain = "Pennsylvania" if state_postal=="PA"
replace reportable_domain = "Illinois" if state_postal=="IL"
replace reportable_domain = "Indiana, Ohio" if state_postal=="IN" | state_postal=="OH"
replace reportable_domain = "Michigan" if state_postal=="MI"
replace reportable_domain = "Wisconsin" if state_postal=="WI"
replace reportable_domain = "Iowa, Minnesota, North Dakota, South Dakota" if state_postal=="IA" | state_postal=="MN" | state_postal=="ND" | state_postal=="SD"
replace reportable_domain = "Kansas, Nebraska" if state_postal=="KS" | state_postal=="NE"
replace reportable_domain = "Missouri" if state_postal=="MO"
replace reportable_domain = "Virginia" if state_postal=="VA"
replace reportable_domain = "Delaware, District of Columbia, Maryland, West Virginia" if state_postal=="DE" | state_postal=="DC" | state_postal=="MD" | state_postal=="WV"
replace reportable_domain = "Georgia" if state_postal=="GA"
replace reportable_domain = "North Carolina, South Carolina" if state_postal=="NC" | state_postal=="SC"
replace reportable_domain = "Florida" if state_postal=="FL"
replace reportable_domain = "Alabama, Kentucky, Mississippi" if state_postal=="AL" | state_postal=="KY" | state_postal=="MS"
replace reportable_domain = "Tennessee" if state_postal=="TN"
replace reportable_domain = "Arkansas, Louisiana, Oklahoma" if state_postal=="AR" | state_postal=="LA" | state_postal=="OK"
replace reportable_domain = "Texas" if state_postal=="TX"
replace reportable_domain = "Colorado" if state_postal=="CO"
replace reportable_domain = "Idaho, Montana, Utah, Wyoming" if state_postal=="ID" | state_postal=="MT" | state_postal=="UT" | state_postal=="WY"
replace reportable_domain = "Arizona" if state_postal=="AZ"
replace reportable_domain = "Nevada, New Mexico" if state_postal=="NV" | state_postal=="NM"
replace reportable_domain = "California" if state_postal=="CA"
replace reportable_domain = "Alaska, Hawaii, Oregon, Washington" if state_postal=="AK" | state_postal=="HI" | state_postal=="OR" | state_postal=="WA"

* MERGE IN ELECTRICITY PRICES
sort state_postal
merge m:1 state_postal using Data_Proc\electricity
drop _merge

* MERGE IN NATURAL GAS PRICES
sort squoosh
merge m:1 squoosh using Data_Proc\naturalgas
drop _merge

* MERGE IN HEATING OIL PRICES
gen temp = squoosh
replace squoosh = padd if heatingoil==.
sort squoosh
merge m:1 squoosh using Data_Proc\heatingoil
drop if _merge==2
drop _merge
replace squoosh = temp
drop temp

* MERGE IN PROPANE PRICES
gen temp = squoosh
replace squoosh = padd if propane==.
sort squoosh
merge m:1 squoosh using Data_Proc\propane
drop if _merge==2
replace squoosh = temp
drop temp
drop _merge

* MERGE IN COP FOR HEAT PUMPS
sort state_fips
merge m:1 state_fips using Data_Proc\cops
tab state_fips _merge
drop if _merge==2
drop _merge

save Data_Proc\recs_temp2, replace
clear

* COLLAPSE PRICES TO REPORTABLE_DOMAIN AVERAGE
use Data_Proc\recs_temp2
drop permelec
keep reportable_domain ph* pe* pg* pp* po* cop_top nweight
replace cop_top = 1/cop_top		/* Invert for harmonic averaging */
collapse (mean) ph* pe* pg* pp* po* cop_top [pw = nweight], by(reportable_domain)
replace cop_top = 1/cop_top		/* Invert back to levels */
sort reportable_domain
save Data_Proc\recs_domainprices, replace
clear


***********************************
* APPEND OLDER DATASETS
***********************************
use Data_Proc\recs_temp2
append using Data_Proc\recs2015
append using Data_Proc\recs2009


***************************************
* MERGE RACE FOR RECS 2009 & RECS 2020
**************************************
sort doeid
merge 1:1 doeid using Data_Proc\recs20092020_race
tab _merge
drop _merge


***************************************
* CREATE NUMERICAL DOMAIN VARIABLE
***************************************
gen domain = 1 if reportable_domain =="Connecticut, Maine, New Hampshire, Rhode Island, Vermont"
replace domain = 2 if reportable_domain =="Massachusetts"
replace domain = 3 if reportable_domain =="New York"
replace domain = 4 if reportable_domain =="New Jersey"
replace domain = 5 if reportable_domain =="Pennsylvania"
replace domain = 6 if reportable_domain =="Illinois"
replace domain = 7 if reportable_domain =="Indiana, Ohio"
replace domain = 8 if reportable_domain =="Michigan"
replace domain = 9 if reportable_domain =="Wisconsin"
replace domain = 10 if reportable_domain =="Iowa, Minnesota, North Dakota, South Dakota"
replace domain = 11 if reportable_domain =="Kansas, Nebraska"
replace domain = 12 if reportable_domain =="Missouri"
replace domain = 13 if reportable_domain =="Virginia"
replace domain = 14 if reportable_domain =="Delaware, District of Columbia, Maryland, West Virginia"
replace domain = 15 if reportable_domain =="Georgia"
replace domain = 16 if reportable_domain =="North Carolina, South Carolina"
replace domain = 17 if reportable_domain =="Florida"
replace domain = 18 if reportable_domain =="Alabama, Kentucky, Mississippi"
replace domain = 19 if reportable_domain =="Tennessee"
replace domain = 20 if reportable_domain =="Arkansas, Louisiana, Oklahoma"
replace domain = 21 if reportable_domain =="Texas"
replace domain = 22 if reportable_domain =="Colorado"
replace domain = 23 if reportable_domain =="Idaho, Montana, Utah, Wyoming"
replace domain = 24 if reportable_domain =="Arizona"
replace domain = 25 if reportable_domain =="Nevada, New Mexico"
replace domain = 26 if reportable_domain =="California"
replace domain = 27 if reportable_domain =="Alaska, Hawaii, Oregon, Washington"

save Data_Proc\recs_allyears, replace
clear


**************
* DATA PREP
**************
use Data_Proc\recs_allyears

* DROP AK AND HI OBSERVATIONS
tab iecc if domain==27
tab iecc recs if domain==27
drop if domain==27 & (iecc=="1A" | iecc=="1A-2A" | iecc=="7A-7B-7AK-8AK" | iecc=="7AK" | iecc=="8AK")	/* Drop AK and HI climate zones from AK, HI, OR, and WA domain */
drop if state=="Alaska" | state=="Hawaii"

* RECS YEAR DUMMIES
foreach y of numlist 2009 2015 2020 {
	gen recs`y' = (recs==`y')
	label var recs`y' "RECS `y'"
}

* HEATING FUEL TYPE
gen fuel = 1 if fuelheat=="Electricity"
replace fuel = 2 if fuelheat=="Natural Gas" | fuelheat=="Natural gas from underground pipes"
replace fuel = 3 if fuelheat=="Bottled gas (LPG or propane)" | fuelheat=="Propane (bottled gas)" | fuelheat=="Propane/LPG"
replace fuel = 4 if fuelheat=="Fuel oil/kerosene" | fuelheat=="Kerosene" | fuelheat=="Fuel Oil" | fuelheat=="Fuel oil"
replace fuel = 5 if fuelheat=="Wood" | fuelheat=="Wood (cordwood or pellets)"
replace fuel = 6 if fuel==.

* ELECTRIC HEAT PUMPS
tab equipm
gen hp = (equipm=="Heat Pump" | equipm =="Heat pump" | equipm=="Central Heat pump" | equipm=="Ductless heat pump") & fuel==1
la var hp "Heat pump"

* HEATING
gen heating = fuel
replace heating = 0 if hp==1
la def heating 0 "Heat pump" 1 "Electricity" 2 "Natural gas" 3 "Propane" 4 "Heating oil" 5 "Wood" 6 "Other/none"
label values heating heating

* HEATING DUMMIES
gen elec = (heating==1)
gen ngas = (heating==2)
gen prop = (heating==3)
gen kero = (heating==4)
gen wood = (heating==5)
gen onht = (heating==6)
gen heat = 1 - onht
la var elec "Other electricity"
la var ngas "Natural gas"
la var kero "Heating oil"
la var prop "Propane"
la var wood "Wood"

* NUMERICAL HOME VINTAGE
split yearmaderange, parse(" to ")
destring yearmaderange1, gen(vintage) force
replace vintage = 1940 if vintage==.
tab yearmaderange vintage
label define vintage 1940 "pre-1950" 1950 "1950s" 1960 "1960s" 1970 "1970s" 1980 "1980s" 1990 "1990s" 2000 "2000s" 2010 "2010s"
label values vintage vintage

* VINTAGE DUMMIES
forvalues v = 1940(10)2010 {
	gen vintage`v' = (vintage==`v')
}

* RECS - VINTAGE GAP
gen gap = recs - vintage - 10

* NUMERICAL EQUIPMENT AGE
gen age = .
replace age = 0 if equipage=="Less than 2 years old"
replace age = 2 if equipage=="2 to 4 years old"
replace age = 5 if equipage=="5 to 9 years old"
replace age = 10 if equipage=="10 to 19 years old"
replace age = 20 if equipage=="20 years or older"
label define age 0 "<2" 2 "2-4" 5 "5-9" 10 "10-19" 20 "20+"
label values age age

* AGE RANGE DUMMIES
gen age0 = (age==0)
gen age2 = (age==2)
gen age5 = (age==5)
gen age10 = (age==10)
gen age20 = (age==20)
la var age0 "Age <2"
la var age2 "Age 2-4"
la var age5 "Age 5-9"
la var age10 "Age 10-19"
la var age20 "Age 20+"

* CUMULATIVE AGE DUMMIES
gen ageu0 = (age<2)
gen ageu1 = (age<5)
gen ageu2 = (age<10)
gen ageu3 = (age<20)
gen ageu4 = 1
la var ageu0 "Age <2"
la var ageu1 "Age <5"
la var ageu2 "Age <10"
la var ageu3 "Age <20"
la var ageu4 "Total"

* NEW EQUIPMENT DUMMY: HEATING EQUIPMENT YOUNGER THAN THE HOUSE
gen maxage = 2 if age==0
replace maxage = 4 if age==2
replace maxage = 9 if age==5
replace maxage = 19 if age==10
replace maxage = 99 if age==20
gen minagehouse = recs - vintage - 10 if recs==2020
replace minagehouse = recs - vintage - 5 if recs==2015
replace minagehouse = recs - vintage - 9 if recs==2009
gen newerequip = (maxage<minagehouse)
la var newerequip "Age < House age"

* CENSUS [FIXING UNCAPTURED MIDWEST FROM R HANDOFF]
replace regionc = "Midwest" if regionc=="NA" | regionc=="Midwest Census Region"
replace regionc = "Northeast" if regionc=="Northeast Census Region"
replace regionc = "South" if regionc=="South Census Region"
replace regionc = "West" if regionc=="West Census Region"
ren regionc region

* GEN REGION
gen regionc=1 if region=="Northeast"
replace regionc=2 if region=="Midwest"
replace regionc=3 if region=="South"
replace regionc=4 if region=="West"
la define region 1 "Northeast" 2 "Midwest" 3 "South" 4 "West"
label values regionc region

* CENSUS DIVISION
local divisions ""New England" "Middle Atlantic" "East North Central" "West North Central" "South Atlantic" "East South Central" "West South Central" "Mountain North" "Mountain South" "Pacific""
foreach d of local divisions {
	replace division = "`d'" if strpos(division,"`d'")>0
}
gen divisionc = 1 if division=="New England"
replace divisionc = 2 if division=="Middle Atlantic"
replace divisionc = 3 if division=="East North Central"
replace divisionc = 4 if division=="West North Central"
replace divisionc = 5 if division=="South Atlantic"
replace divisionc = 6 if division=="East South Central"
replace divisionc = 7 if division=="West South Central"
replace divisionc = 8 if division=="Mountain North"
replace divisionc = 9 if division=="Mountain South"
replace divisionc = 10 if division=="Pacific"
gen div = subinstr(division," ","",.)

* RURAL
gen rural = (uatyp10=="Rural" | uatyp10=="Rural area" | uatyp10=="R") if recs>=2009

* OWNER OCCUPIED
gen own = 1 - (kownrent=="Rented") - (kownrent=="Occupy without payment of rent")
replace own = . if recs<=2005
la var own "Owner"

* ADD FUEL TYPE WEIGHTS FOR 2020 PRICES
save Data_Proc\recs_temp, replace
keep if recs==2009 | recs==2020
keep if heating>=0 & heating<5
collapse (mean) hp elec ngas prop kero [pw = nweight], by(reportable_domain)
foreach v of varlist hp elec ngas prop kero {
	ren `v' s`v'
}
sort reportable_domain
save Data_Proc\price_weights, replace
clear
use Data_Proc\recs_temp
sort reportable_domain
merge m:1 reportable_domain using Data_Proc\price_weights
drop if _merge==2
drop _merge

* INCOME
gen temp = moneypy if recs==2020
destring temp, force replace
gen income = 1
replace income = 2 if temp>=30000
replace income = 3 if temp>=40000
replace income = 4 if temp>=50000
replace income = 5 if temp>=60000
replace income = 6 if temp>=75000
replace income = 7 if temp>=100000
replace income = 8 if temp>=150000
replace income = . if temp==.
gen highinc = (income>4)

* INCOME GROUPS
gen incomeq = 1 if recs==2009 | recs==2020
replace incomeq = 2 if temp>=30000 & recs==2020
replace incomeq = 2 if moneyp=="$25,000 to $29,999" & recs==2009
replace incomeq = 2 if moneyp=="$30,000 to $34,999" & recs==2009
replace incomeq = 2 if moneyp=="$35,000 to $39,999" & recs==2009
replace incomeq = 2 if moneyp=="$40,000 to $44,999" & recs==2009
replace incomeq = 2 if moneyp=="$45,000 to $49,999" & recs==2009
replace incomeq = 3 if temp>=60000 & recs==2020
replace incomeq = 3 if moneyp=="$50,000 to $54,999" & recs==2009
replace incomeq = 3 if moneyp=="$55,000 to $59,999" & recs==2009
replace incomeq = 3 if moneyp=="$60,000 to $64,999" & recs==2009
replace incomeq = 3 if moneyp=="$65,000 to $69,999" & recs==2009
replace incomeq = 3 if moneyp=="$70,000 to $74,999" & recs==2009
replace incomeq = 3 if moneyp=="$75,000 to $79,999" & recs==2009
replace incomeq = 3 if moneyp=="$80,000 to $84,999" & recs==2009
replace incomeq = 4 if temp>=100000 & recs==2020
replace incomeq = 4 if moneyp=="$85,000 to $89,999" & recs==2009
replace incomeq = 4 if moneyp=="$90,000 to $94,999" & recs==2009
replace incomeq = 4 if moneyp=="$95,000 to $99,999" & recs==2009
replace incomeq = 4 if moneyp=="$100,000 to $119,999" & recs==2009
replace incomeq = 4 if moneyp=="$120,000 or More" & recs==2009
la def incomelab 1 "0-29k" 2 "30-59k" 3 "60-99k" 4 "100k+"
label values incomeq incomelab

* RACE
gen white = (race==1)
gen black = (race==2)
gen native = (race==3)
gen asian = (race==4)
gen other = (race==5 | race==6 | race==7)
la var white "White"
la var black "Black"
la var black "Nat.Am."
la var asian "Asian"
la var other "Other"


**************************************************************************************************************
* MERGE ENERGY PRICES BY DIVISION (OMIT THIS SECTION TO USE STATE-LEVEL ENERGY PRICES MATCHED TO 2020 RECS)
**************************************************************************************************************
drop ph* pe* pg* pp* po* cop_top
sort reportable_domain
merge m:1 reportable_domain using Data_Proc\recs_domainprices
tab _merge
tab reportable_domain _merge
drop _merge

* CHOOSE COP FOR HEAT PUMPS & OTHER TECHS
gen coph = cop_top
destring coph, force replace
gen cope = 1
gen copg = 0.93
gen copo = 0.90
gen copp = 0.85

* RE-CALCULATE ENERGY PRICES ADJUSTING FOR OUTPUT HEATING ($/MMBTU)
local fuels "h e g o p"
foreach j of local fuels {
	forvalues t=10/20 {
		replace p`j'`t' = p`j'`t'/cop`j'
	}
}

* CALCULATE TOTAL INPUT AND EXIT ENERGY FOR SPACE HEATING FROM ELECTRICITY AND FOSSIL FUELS (MMBTU)
gen btuin = totalbtusph/1000
gen btuex = btuin*(cope*(heating==1) + copg*(heating==2) + copp*(heating==3) + copo*(heating==4)) /* no heat pumps */
gen lnbtuex = ln(btuex)

* CALCULATE ENERGY DEMAND PREDICTORS
gen lnsqft = ln(tothsqft)
gen lnhdd = ln(hdd65)

* GENERATE COMMON CLIMATE ZONE ACROSS 2009, 2015, AND 2020 RECS
gen combo_iecc = iecc
replace combo_iecc = "1A-2A" if iecc=="1A"
replace combo_iecc = "1A-2A" if iecc=="2A"
replace combo_iecc = "3B-4B" if iecc=="3B"
replace combo_iecc = "3B-4B" if iecc=="4B"
replace combo_iecc = "5B-5C" if iecc=="5B"
replace combo_iecc = "5B-5C" if iecc=="5C"
replace combo_iecc = "6A-6B" if iecc=="6A"
replace combo_iecc = "6A-6B" if iecc=="6B"
replace combo_iecc = "7A-7B-7AK-8AK" if iecc=="7A"
replace combo_iecc = "7A-7B-7AK-8AK" if iecc=="7B"
encode combo_iecc, gen(iecc_code)

* MODEL HEATING DEMAND
poisson btuex lnsqft lnhdd i.vintage i.iecc_code /// 
	if btuex>0 & (heating==1 | heating==2 | heating==4 | heating==4) & recs>=2009 /* All RECS, excludes heat pumps */
predict yhat
replace lnhdd = ln(hdd30yr)
predict btuexhat			/* Predict demand using 30-yr climate hdd */
reg btuex yhat if btuex>0 	/* Show that actual and predicted line up closely */
twoway (scatter yhat hdd65 if tothsqft>1500 & tothsqft<1600 & vintage==2000) (scatter yhat hdd65 if tothsqft>1000 & tothsqft<1100 & vintage==2000) /* Show what function looks like mmbtu vs. hdd */

* ANNUAL ENERGY COSTS VIA EACH FUEL TYPE (NOTE THAT BOTH DEMAND AND PRICES ARE IN EXIT BTUS)
local fuels "h e g o p"
foreach j of local fuels {
	forvalues t=10/20 {
		gen c`j'`t' = btuexhat*p`j'`t'
	}
}

* CALCULATE PRICE CHANGES
*local prices = "pe ph pg po pp"
local prices = "ce ch cg co cp"
foreach v of local prices {
	gen `v'  = (`v'19 + `v'18 + `v'17 + `v'16 + `v'15 + `v'14 + `v'13 + `v'12 + `v'11 + `v'10)/10	/* 2010-19 */
	sum `v' if recs==2020 | recs==2009 [iw = nweight]
	gen ln`v' = ln(`v'/r(mean))			/* Log costs normalized to zero at mean */
	replace ln`v' = 0 if ln`v'==. & (recs==2020 | recs==2009)
}
gen lnca = (shp*lnch + selec*lnce + sngas*lncg + skero*lnco + sprop*lncp)/(shp + selec + sngas + skero + sprop)
gen lncb = (selec*lnce + sngas*lncg + skero*lnco + sprop*lncp)/(selec + sngas + skero + sprop)	
/* Wgt avg log cost for non-HP */
gen lncr = lncb - lnch																			
/* Wgt avg log cost for HP */


save Data_Proc\recs_clean, replace
clear

exit
